# loading required libraries
library(AER) 
library(broom)  
library(dplyr)
library(ggplot2)

# loading data
panel <- read.csv("/Users/johnkim/brewery_panel_20250307.csv")

# making the portion into percentage
panel$lagged_perage25 <- panel$lagged_perage25 * 100
panel$lagged_perage35 <- panel$lagged_perage35 * 100
panel$lagged_perage45 <- panel$lagged_perage45 * 100

# splitting the data by year
for (i in 2002:2021) {
  year_data <- panel %>%
    filter(year == i)
  assign(paste0("df_", i), year_data)
}

# iterating regression over years
for (year in 2003:2020) {
  df_name <- paste0("df_", year)
  formula_brewery <- as.formula(paste("brewery ~ brewery_w + lagged_super + lagged_drink +lagged_pop +",
                                      "lagged_popdens + ln_lagged_pci + lagged_white + lagged_bach + univ_ex +",
                                      "lagged_perage25 + lagged_perage35 + lagged_perage45 + lagged_homebrew +",
                                      "lagged_growler + lagged_dtc + lagged_self + lagged_sampling + lagged_sunday",
                                      "|",
                                      "lagged_super + lagged_drink +lagged_pop +",
                                      "lagged_popdens + ln_lagged_pci + lagged_white + lagged_bach + univ_ex +",
                                      "lagged_perage25 + lagged_perage35 + lagged_perage45 + lagged_homebrew +",
                                      "lagged_growler + lagged_dtc + lagged_self + lagged_sampling + lagged_sunday +",
                                      "lagged_popdens_w + ln_lagged_pci_w + lagged_perwhite_w + lagged_perbach_w + univ_ex_w +",
                                      "lagged_perage25_w + lagged_perage35_w + lagged_perage45_w + lagged_homebrew_w +",
                                      "lagged_growler_w + lagged_dtc_w + lagged_selfdist_w + lagged_sampling_w + lagged_sunday_w"))
  
  iv_model <- ivreg(formula_brewery, data = get(df_name))
  # storing the results
  results_list[[as.character(year)]] <- tidy(iv_model) %>%
    mutate(year = year)
}

# just the 2020 results from the list
results_2020 <- results_list[["2020"]]

# the coefficients and standard errors
results_2020 %>%
  select(term, estimate, std.error) %>%
  mutate(
    estimate = round(estimate, 8),
    std.error = round(std.error, 8)
  ) %>%
  knitr::kable()

# combining all results
all_results <- bind_rows(results_list)

# selecting policy variables of interest (you may need to adjust this list)
policy_vars <- c("lagged_homebrew", "lagged_growler", "lagged_dtc", "lagged_self", "lagged_sampling", "lagged_sunday", "univ_ex", "brewery_w")
policy_name_map <- c(
  "lagged_homebrew" = "Homebrew Law",
  "lagged_growler" = "Growler Law",
  "lagged_dtc" = "Direct-to-Consumer Law",
  "lagged_self" = "Self-Distribution Law",
  "lagged_sampling" = "Sampling Law",
  "lagged_sunday" = "Sunday Sales Law",
  "univ_ex" = "University Presence",
  "brewery_w" = "Brewery Spatial Lags"
)

# updating the policy_vars vector
policy_vars <- names(policy_name_map)

policy_results <- all_results %>%
  filter(term %in% policy_vars) %>%
  mutate(term = factor(term, levels = policy_vars, labels = policy_name_map[policy_vars]))

# creating the coefficient plot with updated names
ggplot(policy_results, aes(x = year, y = estimate, color = term)) +
  geom_line() +
  geom_point() +
  geom_errorbar(aes(ymin = estimate - std.error, ymax = estimate + std.error), width = 0.2) +
  facet_wrap(~ term, scales = "free_y", ncol = 2) +
  theme_minimal() +
  labs(
    x = "Year",
    y = "Coefficient Estimate"
  ) +
  theme(
    legend.position = "none",  
    strip.text = element_text(size = 14),  
    axis.title = element_text(size = 12),  
    axis.text = element_text(size = 12),
    panel.background = element_rect(fill = "gray95", color = NA), 
    plot.background = element_rect(fill = "white", color = NA),  
    panel.grid.major = element_line(color = "white"),  
    panel.grid.minor = element_line(color = "gray90")  
  )

# saving the plot
ggsave("brewery_20250302.png", plot = last_plot(), dpi = 300, bg = "white", width = 10, height = 8)

# Style (DV)
# creating a list to store results
results_list <- list()

# iterating over years
for (year in 2003:2020) {
  df_name <- paste0("df", year)
  
  # running the regression
  formula_style <- as.formula(paste("stylecount ~ style_w + lagged_super + lagged_drink +lagged_pop +",
                                    "lagged_popdens + ln_lagged_pci + lagged_white + lagged_bach + univ_ex +",
                                    "lagged_perage25 + lagged_perage35 + lagged_perage45 + lagged_homebrew +",
                                    "lagged_growler + lagged_dtc + lagged_self + lagged_sampling + lagged_sunday",
                                    "|",
                                    "lagged_super + lagged_drink +lagged_pop +",
                                    "lagged_popdens + ln_lagged_pci + lagged_white + lagged_bach + univ_ex +",
                                    "lagged_perage25 + lagged_perage35 + lagged_perage45 + lagged_homebrew +",
                                    "lagged_growler + lagged_dtc + lagged_self + lagged_sampling + lagged_sunday +",
                                    "lagged_popdens_w + ln_lagged_pci_w + lagged_perwhite_w + lagged_perbach_w + univ_ex_w +",
                                    "lagged_perage25_w + lagged_perage35_w + lagged_perage45_w + lagged_homebrew_w +",
                                    "lagged_growler_w + lagged_dtc_w + lagged_selfdist_w + lagged_sampling_w + lagged_sunday_w"))
  
  iv_model <- ivreg(formula_style, data = get(df_name))
  
  # storing the results
  results_list[[as.character(year)]] <- tidy(iv_model) %>%
    mutate(year = year)
}

# just the 2020 results from the list
results_2020 <- results_list[["2020"]]

results_2020 %>%
  select(term, estimate, std.error) %>%
  mutate(
    estimate = round(estimate, 8),
    std.error = round(std.error, 8)
  ) %>%
  knitr::kable()

all_results <- bind_rows(results_list)

# selecting policy variables of interest (you may need to adjust this list)
policy_vars <- c("lagged_homebrew", "lagged_growler", "lagged_dtc", "lagged_self", "lagged_sampling", "lagged_sunday", "univ_ex", "style_w")
policy_name_map <- c(
  "lagged_homebrew" = "Homebrew Law",
  "lagged_growler" = "Growler Law",
  "lagged_dtc" = "Direct-to-Consumer Law",
  "lagged_self" = "Self-Distribution Law",
  "lagged_sampling" = "Sampling Law",
  "lagged_sunday" = "Sunday Sales Law",
  "univ_ex" = "University Presence",
  "style_w" = "Beer Style Spatial Lags"
)

policy_vars <- names(policy_name_map)

policy_results <- all_results %>%
  filter(term %in% policy_vars) %>%
  mutate(term = factor(term, levels = policy_vars, labels = policy_name_map[policy_vars]))

# the coefficient plot with updated names
ggplot(policy_results, aes(x = year, y = estimate, color = term)) +
  geom_line() +
  geom_point() +
  geom_errorbar(aes(ymin = estimate - std.error, ymax = estimate + std.error), width = 0.2) +
  facet_wrap(~ term, scales = "free_y", ncol = 2) +
  theme_minimal() +
  labs(
    x = "Year",
    y = "Coefficient Estimate"
  ) +
  theme(
    legend.position = "none", 
    strip.text = element_text(size = 14),  
    axis.title = element_text(size = 12),  
    axis.text = element_text(size = 12),
    panel.background = element_rect(fill = "gray95", color = NA),  
    plot.background = element_rect(fill = "white", color = NA),  
    panel.grid.major = element_line(color = "white"), 
    panel.grid.minor = element_line(color = "gray90") 
  )
# Save the plot
ggsave("style_20250302.png", plot = last_plot(), dpi = 300, bg = "white", width = 10, height = 8)

# Beer count (DV)
results_list <- list() 

# iterating the regression over years
for (year in 2003:2020) {
  df_name <- paste0("df", year)
  
  # running the regression
  formula <- as.formula(paste("beercount ~ beer_w + lagged_super + lagged_drink +lagged_pop +",
                              "lagged_popdens + ln_lagged_pci + lagged_white + lagged_bach + univ_ex +",
                              "lagged_perage25 + lagged_perage35 + lagged_perage45 + lagged_homebrew +",
                              "lagged_growler + lagged_dtc + lagged_self + lagged_sampling + lagged_sunday",
                              "|",
                              "lagged_super + lagged_drink +lagged_pop +",
                              "lagged_popdens + ln_lagged_pci + lagged_white + lagged_bach + univ_ex +",
                              "lagged_perage25 + lagged_perage35 + lagged_perage45 + lagged_homebrew +",
                              "lagged_growler + lagged_dtc + lagged_self + lagged_sampling + lagged_sunday +",
                              "lagged_popdens_w + ln_lagged_pci_w + lagged_perwhite_w + lagged_perbach_w + univ_ex_w +",
                              "lagged_perage25_w + lagged_perage35_w + lagged_perage45_w + lagged_homebrew_w +",
                              "lagged_growler_w + lagged_dtc_w + lagged_selfdist_w + lagged_sampling_w + lagged_sunday_w"))
  
  iv_model <- ivreg(formula, data = get(df_name))
  
  results_list[[as.character(year)]] <- tidy(iv_model) %>%
    mutate(year = year)
}

results_2020 <- results_list[["2020"]]

results_2020 %>%
  select(term, estimate, std.error) %>%
  mutate(
    estimate = round(estimate, 8),
    std.error = round(std.error, 8)
  ) %>%
  knitr::kable()

all_results <- bind_rows(results_list)

policy_vars <- c("lagged_homebrew", "lagged_growler", "lagged_dtc", "lagged_self", "lagged_sampling", "lagged_sunday", "univ_ex", "beer_w")

policy_name_map <- c(
  "lagged_homebrew" = "Homebrew Law",
  "lagged_growler" = "Growler Law",
  "lagged_dtc" = "Direct-to-Consumer Law",
  "lagged_self" = "Self-Distribution Law",
  "lagged_sampling" = "Sampling Law",
  "lagged_sunday" = "Sunday Sales Law",
  "univ_ex" = "University Presence",
  "beer_w" = "Beer Spatial Lags"
)

policy_vars <- names(policy_name_map)

policy_results <- all_results %>%
  filter(term %in% policy_vars) %>%
  mutate(term = factor(term, levels = policy_vars, labels = policy_name_map[policy_vars]))

# creating the coefficient plot 
ggplot(policy_results, aes(x = year, y = estimate, color = term)) +
  geom_line() +
  geom_point() +
  geom_errorbar(aes(ymin = estimate - std.error, ymax = estimate + std.error), width = 0.2) +
  facet_wrap(~ term, scales = "free_y", ncol = 2) +
  theme_minimal() +
  labs(
    x = "Year",
    y = "Coefficient Estimate"
  ) +
  theme(
    legend.position = "none",  
    strip.text = element_text(size = 14),  
    axis.title = element_text(size = 12),  
    axis.text = element_text(size = 12),
    panel.background = element_rect(fill = "gray95", color = NA), 
    plot.background = element_rect(fill = "white", color = NA),   
    panel.grid.major = element_line(color = "white"), 
    panel.grid.minor = element_line(color = "gray90"))

# saving the plot
ggsave("beer_20250302.png", plot = last_plot(), dpi = 300, bg = "white", width = 10, height = 8)

